// ANES20Ethnicity

log using ANES20Ethnicity.log, replace

use "C:\Users\sbstjp\OneDrive - Cardiff University\anes_timeseries_2020_stata_20220210.dta" // American National Election Study 2020 Time Series Study, Feb 10, 2022 Version. Date accessed: March 09, 2025. Accessed from https://electionstudies.org/data-center/2020-time-series-study/

// Social justice scale 
*Delete missing values
rename V201411x tgpolicy 
replace tgpolicy = . if tgpolicy == -2
rename V201626 offence 
replace offence = . if inlist(offence, -5, -9)
rename V202183 metoo 
replace metoo = . if inlist(metoo, -9, -7, -6, -5, -4, 998, 999)
rename V202174 blm 
replace blm = . if inlist(blm, -9, -7, -6, -5, -4, 998, 999)

*Reverse code offence so social justice values are high
egen maxval = max(offence)
gen roffence = maxval + 1 - offence
drop maxval

*Standardize items in the scale from 1-2 - this avoids 0, for reasons outlined in next step
foreach var in tgpolicy roffence metoo blm {
    summarize `var'
    gen s`var' = 1 + (`var' - r(min)) / (r(max) - r(min))
}

* Replace missing values with 0 for the specified variables - this is necessary as Stata doesn't add up missing values and means a 0-1 standardization scale isn't feasible as missing values would overlap with the scale
foreach var in stgpolicy sroffence smetoo sblm  {
    replace `var' = 0 if missing(`var')
}

* Initialize the total score and the count of non-zero responses
gen total_scoreSJV = 0
gen count_nonzeroSJV = 0

* Add each variable to the total scale score and count it if non-zero
foreach var in stgpolicy sroffence smetoo sblm  {
    replace total_scoreSJV = total_scoreSJV + `var'
    replace count_nonzeroSJV = count_nonzeroSJV + (`var' != 0)
}

* Calculate the average score, avoiding division by zero
gen SocJusValues = .
replace SocJusValues = total_scoreSJV / count_nonzeroSJV if count_nonzeroSJV > 0

// Demographic variables
* Delete missing values
rename V201507x age 
replace age = . if age == -9

rename V201617x income 
replace income = . if inlist(income, -9, -5)

rename V201600 FemaleGender 
replace FemaleGender = . if FemaleGender == -9

rename V201510 education 
replace education = . if inlist(education, -9, -8, 95)

rename V201549x race
replace race = . if inlist(race, -9, -8)

*Create dummy variables
gen Graduate=.
replace Graduate=0 if education<6
replace Graduate=1 if inrange(education, 6, 8)

gen Black=.
replace Black=1 if race==2
replace Black=0 if race==1
replace Black=0 if inrange(race, 3, 6)

gen Hispanic=.
replace Hispanic=1 if race==3
replace Hispanic=0 if inlist(race, 1, 2)
replace Hispanic=0 if inrange(race, 4, 6)

gen AsianAndOther=.
replace AsianAndOther=1 if race==4
replace AsianAndOther=0 if inrange(race, 1, 3)
replace AsianAndOther=0 if inlist(race, 5, 6)

gen White=.
replace White=1 if race==1
replace White=0 if inrange(race, 2, 6)

// Standardize variables 
egen Age = std(age)
egen Income = std(income) 

// Regessions
regress SocJusValues Age FemaleGender Graduate Income White [pweight=V200010b], robust 
eststo
regress SocJusValues Age FemaleGender Graduate Income Black [pweight=V200010b], robust 
eststo
regress SocJusValues Age FemaleGender Graduate Income Hispanic [pweight=V200010b], robust 
eststo
regress SocJusValues Age FemaleGender Graduate Income AsianAndOther [pweight=V200010b], robust 
eststo
esttab

//Correlations
pwcorr SocJusValues White Black Hispanic AsianAndOther [aweight=V200010b], sig
log close